Massaging
#Convert to dataframe
doubleclick <- as.data.frame(doubleclick)
#Look for weird stuff
table(doubleclick$`Match Type`)
##
## Advanced Broad Exact N/A Standard
## 969 2591 22 48 880
# The NAs have to be removed.
doubleclick_clean <- na.omit(doubleclick)
# Notice how the number of rows gets reduced
print(nrow(doubleclick_clean))
## [1] 3286
# Look for Spelling mistakes
unique(doubleclick_clean $`Bid Strategy`)
## [1] "Position 2-5 Bid Strategy" "Position 1- 3"
## [3] "Position 1-2 Target" "Position 5-10 Bid Strategy"
## [5] "Position 1-4 Bid Strategy" "Position 1 -2 Target"
## [7] "Postiion 1-4 Bid Strategy" "Pos 3-6"
# Replace Typos
doubleclick_clean$`Bid Strategy` <- gsub("Postiion 1-4 Bid Strategy","Position 1-4 Bid Strategy",doubleclick_clean$`Bid Strategy`)
doubleclick_clean$`Bid Strategy` <- gsub("Position 1 -2 Target","Position 1-2 Target",doubleclick_clean$`Bid Strategy`)
Descriptive
# Create data set for analysis
sem <- doubleclick_clean[,c('Campaign','Keyword','Keyword Group','Publisher Name', 'Bid Strategy','Engine Click Thru %','Match Type','Trans. Conv. %','Total Cost/ Trans.','Impressions')]
# Get a big picture understanding of the data
summary(sem)
## Campaign Keyword Keyword Group Publisher Name
## Length:3286 Length:3286 Length:3286 Length:3286
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Bid Strategy Engine Click Thru % Match Type Trans. Conv. %
## Length:3286 Min. : 0.01287 Length:3286 Min. : 0.0000
## Class :character 1st Qu.: 1.63934 Class :character 1st Qu.: 0.0000
## Mode :character Median : 4.13532 Mode :character Median : 0.0000
## Mean : 10.90149 Mean : 0.3562
## 3rd Qu.: 10.12303 3rd Qu.: 0.0000
## Max. :200.00000 Max. :100.0000
## Total Cost/ Trans. Impressions
## Min. : 0.00 Min. : 1
## 1st Qu.: 0.00 1st Qu.: 31
## Median : 0.00 Median : 178
## Mean : 31.31 Mean : 7018
## 3rd Qu.: 0.00 3rd Qu.: 847
## Max. :9597.17 Max. :4492536
str(sem)
## 'data.frame': 3286 obs. of 10 variables:
## $ Campaign : chr "Air France Brand & French Destinations" "Air France Global Campaign" "Unassigned" "Unassigned" ...
## $ Keyword : chr "air discount france ticket" "[airfrance]" "air france online booking" "airfrance,com" ...
## $ Keyword Group : chr "France" "Air France" "Unassigned" "Unassigned" ...
## $ Publisher Name : chr "MSN - Global" "Google - Global" "Overture - Global" "Overture - Global" ...
## $ Bid Strategy : chr "Position 2-5 Bid Strategy" "Position 1- 3" "Position 1-2 Target" "Position 1-2 Target" ...
## $ Engine Click Thru %: num 11.11 14.71 2.52 5.82 23.08 ...
## $ Match Type : chr "Broad" "Exact" "Standard" "Standard" ...
## $ Trans. Conv. % : num 100 3.39 12.5 4.76 33.33 ...
## $ Total Cost/ Trans. : num 0.388 1.156 2.2 2.625 5.213 ...
## $ Impressions : num 9 401 318 722 13 ...
# Find out most frequently used bid strategy
table(sem$`Bid Strategy`)
##
## Pos 3-6 Position 1- 3
## 45 264
## Position 1-2 Target Position 1-4 Bid Strategy
## 285 151
## Position 2-5 Bid Strategy Position 5-10 Bid Strategy
## 333 2208
# Find out unique publishers
unique(sem$`Publisher Name`)
## [1] "MSN - Global" "Google - Global" "Overture - Global"
## [4] "Google - US" "Overture - US" "Yahoo - US"
# Average out the clickthroughs per publisher
clickthrough_publisher <- aggregate(sem$`Engine Click Thru %`, by=list(sem$`Publisher Name`), FUN=mean)
# Visualize average clickthroughs per publisher
plot_ly(clickthrough_publisher, x = clickthrough_publisher$`Group.1`, y=~`x`)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
# Average out the costs per click per publisher
costs_publisher <- aggregate(sem$`Total Cost/ Trans.`, by=list(sem$`Publisher Name`), FUN=mean)
# Visualize average clickthroughs per engine
plot_ly(costs_publisher, x = costs_publisher$`Group.1`, y=~`x`)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#bar
It seems like Google-US has the highest clickthrough rate and the costs / click are unusually high for Yahoo - US. One reason could be the advanced Match Type that gets Air France uses on that engine.
# Visualize impressions per publisher
ggplot(data=doubleclick_clean, aes(x=sem$`Publisher Name`, y=sem$`Impressions`, color=sem$`Match Type`)) + geom_point() + scale_y_continuous(trans='log10')

# Visualize impressions per campaign
plot_ly(doubleclick_clean, x = doubleclick_clean$`Campaign`, y=~Impressions, type='bar')
Most impressions come from unassigned keywords.
# Select observations with the highest total cost per transaction
sem_sub <- subset(sem,subset = `Total Cost/ Trans.` > 0)
# Visualize the costs per transactions for different Publisher
p <- plot_ly(sem_sub, y = ~`Total Cost/ Trans.`, color = I("black"),
alpha = 0.2, boxpoints = "suspectedoutliers")
p1 <- p %>% add_boxplot(x = ~`Publisher Name`)
p1
# Visualize the converted transactions for different bid strategies
convert_bid <- plot_ly(sem_sub, y = ~`Trans. Conv. %`, color = I("black"),
alpha = 0.2, boxpoints = "suspectedoutliers")
p2 <- p %>% add_boxplot(x = ~`Bid Strategy`)
p2
# Visualize the numerical variables in 3D-Space
plot_ly(sem, x = ~`Engine Click Thru %`, y = ~`Trans. Conv. %`, z =~`Total Cost/ Trans.`) %>%
add_markers(color = ~`Trans. Conv. %`)
Keywords
ggplot(data=doubleclick_clean, aes(x=sem$`Trans. Conv.`, y=sem$`Total Cost/ Trans.`, color=sem$`Publisher Name`)) + geom_point() + scale_y_continuous(trans='log10') + scale_x_continuous(trans='log10')
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
